tempfile temp 
set more off
set scheme sonia_new

import excel RegionName UnitedStates using Zillow.xlsx, sheet("Sheet 2") cellrange(A3) clear
drop if _n<3
rename RegionName date
rename UnitedStates Zillow_HPV
gen year = substr(date,1,4)
gen mo   = substr(date,6,2)
drop date
destring year mo Zillow_HPV, replace
save `temp', replace

import excel CaseShiller_historical.xlsx, cellrange(A8) sheet("Data") clear
keep A I
rename A date
rename I CS_historical
gen year = floor(date)
gen mo = ceil((date - floor(date))*12+0.01)
drop if missing(year) | missing(mo)
merge 1:1 year mo using `temp'
tab _merge
drop _merge
save `temp', replace

import excel CaseShiller.xlsx, cellrange(A12) clear
keep A B
rename A date
rename B CS_HPI
gen year = year(date)
gen mo   = month(date)
merge 1:1 year mo using `temp'
tab _merge
drop _merge
save `temp', replace

import excel CPIlessShelter.xlsx, cellrange(A69) clear
rename A date
rename B CPI_less_shelter
gen year = substr(date,1,4)
gen mo   = substr(date,6,2)
drop date
destring year mo CPI_less_shelter, replace
merge 1:1 year mo using `temp'
tab _merge
drop _merge

drop date
gen date = ym(year,mo)
format date %tm


* get rid of extra data
egen min_date = min(date) if ~missing(CPI_less_shelter)
drop if date<min_date

* find the base year for CS
sum date if CS_HPI==100
scalar base=`r(mean)'

* map into Zillow
sum Zillow_HPV if date==base
scalar zillow_scale = `r(mean)'
gen Zillow_HPI = 100*Zillow_HPV/zillow_scale

* map into CS Historical
sum CS_historical if date==base
scalar CS_scale = `r(mean)'
gen CS_historical_ind = 100*CS_historical/CS_scale
* check that they are the same and keep the longer series
twoway (line CS_historical_ind  date,yaxis(2))  (line CS_HPI date) 
drop CS_HPI
gen CS_HPI = CS_historical_ind

* compare the two
twoway (line CS_HPI date) (line Zillow_HPI date)

* deflate both series by CPI
gen CS_HPV = zillow_scale*CS_HPI/100
gen real_CS_HPI = CS_HPI/CPI_less_shelter
gen real_Zillow_HPI = Zillow_HPI/CPI_less_shelter
gen real_CS_HPV = CS_HPV/CPI_less_shelter
gen real_Zillow_HPV = Zillow_HPV/CPI_less_shelter
gen real_CS_HPI12ch = (real_CS_HPI-real_CS_HPI[_n-12])/real_CS_HPI[_n-12]
sum real_CS_HPI12ch
replace real_CS_HPI12ch = real_CS_HPI12ch - `r(mean)'
twoway (line real_CS_HPV date) (line real_Zillow_HPV date)
twoway (line real_CS_HPI12ch date if year>1953) 


*** Yearly ********
preserve 
collapse (mean) real* , by(year)
tsset year
*xtile tertile = hpreal_CS_HPV2, nquantiles(3)
*twoway (tsline hpreal_CS_HPV2) (tsline tertile, yaxis(2)), name(first, replace)

tsfilter hp hpreal_CS_HPI = real_CS_HPI, smooth(6.25)
xtile tertile = hpreal_CS_HPI, nquantiles(3)
bysort tertile: egen tertile_to_plot = mean(hpreal_CS_HPI)
sort year

twoway (connected tertile_to_plot year) (line hpreal_CS_HPI year, ysc(r(0.05 0.07)) ylab(-0.05(0.05)0.05) yti("")) ///
    , xti("Year") legend(lab(2 "Case Shiller adjusted series") lab(1 "Assigned state"))
*twoway (line hpreal_CS_HPI year, ysc(r(0.05 0.07)) ylab(-0.05(0.05)0.05) yti("") lc(navy)) ///
*    (connected tertile_to_plot year, lc(maroon) mc(maroon)), xti("Year") legend(lab(1 "Case Shiller adjusted series") lab(2 "Assigned state"))
graph export Yearly_CS_series.pdf, replace


gen state = .
foreach n in 1 2 3 {
    gen t`n' = 1 if tertile==`n' 
	foreach nlag in 1 2 3 {
	   gen t`nlag'`n' = 1 if tertile[_n-1]==`nlag' & tertile==`n'
	   replace state = `nlag'`n' if tertile[_n-1]==`nlag' & tertile==`n'
	   foreach nlag2 in 1 2 3 {
		   gen t`nlag2'`nlag'`n' = 1 if tertile[_n-2]==`nlag2' & tertile[_n-1]==`nlag' & tertile==`n' 
	   }
	}
}
save year_states, replace
collapse (sum) t* 
drop tertile_to_plot

outsheet using transitiona_matrix_year.csv, replace comma nolabel 
export excel transitiona_matrix_year.xlsx, replace 
restore
